data_sumtable <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ0_table")
data_acc <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ1_acc")
data_imp_features <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ2_Important_features", na = "NA")
data_dimreduction <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ3_reducing")
data_PROBAST <- readxl::read_excel(path = "PROBAST_assessment.xlsx", sheet = "Main")
colnames(data_PROBAST) <- data_PROBAST[1,]
data_PROBAST <- data_PROBAST[2:14,]
# Replace spaces and hyphens in column names with _
colnames(data_acc) <- gsub(" ","_",colnames(data_acc))
colnames(data_imp_features) <- gsub(" - ","_",colnames(data_imp_features))
colnames(data_imp_features) <- gsub(" ","_",colnames(data_imp_features))
colnames(data_dimreduction) <- gsub(" ","_",colnames(data_dimreduction))
## Step 1: Modify table entries
# Column: Responders/nonresponders
data_sumtable$`Responders/nonresponders` <- gsub(" responders, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonresponders","",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" remitters, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonremitters","",data_sumtable$`Responders/nonresponders`)
# Add proportion of nonresponders
# proportion_nonresponders <- character(nrow(data_sumtable))
# row_idx = 8
# for (row_idx in 1:nrow(data_sumtable)){
# strsplit(data_sumtable$`Responders/nonresponders`[row_idx], "\n")
# if
# entry
# entry <- data_sumtable$`Responders/nonresponders`[row_idx]
# responders <- strsplit(data_sumtable$`Responders/nonresponders`[row_idx], c("; \r\n"))[[1]][1]
# nonresponders <- strsplit(entry, "/")[[1]][2]
# proportion <- round(as.numeric(nonresponders)/(as.numeric(responders)+as.numeric(nonresponders))*100,0)
# entry <- paste(data_sumtable$`Responders/nonresponders`[row_idx]," (",proportion,"%)", sep = "")
# }
# Column: Treatment
data_sumtable$Treatment <- gsub("atypical antipsychotics","AAP",data_sumtable$Treatment)
#data_sumtable$treatment <- gsub("Alpha2-receptor-antagonists","AAP",data_sumtable$treatment)
# Column: Information on models tested
data_sumtable$`Information on models tested` <- gsub(" of models tested","",data_sumtable$`Information on models tested`)
# Column: Definition of treatment outcome
data_sumtable$`Definition of treatment outcome` <- gsub("\\s*\\([^\\)]+\\)","",data_sumtable$`Definition of treatment outcome`)
# Column: Input features
data_sumtable$`Type of functional-connectivity-based input features` <- gsub("[(]wb:.*)","",data_sumtable$`Type of functional-connectivity-based input features`)
# Column: FC estimation
data_sumtable$`Way of estimating the underlying functional connectivities`<- gsub("Group-information guided","Gig",data_sumtable$`Way of estimating the underlying functional connectivities`)
# Column: Algorithms
data_sumtable$`Algorithm(s) of the final classifier(s)`<- gsub("graph convolutional network","GCN",data_sumtable$`Algorithm(s) of the final classifier(s)`)
## Step 2: delete columns (Age group and Year)
data_sumtable$`Age group`<- NULL
data_sumtable$Year <- NULL
## Step 3: change column names
colnames(data_sumtable) <- gsub("Sample size","N",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Balanced_acc_final","Best Acc",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Responders/nonresponders","Responders/ nonresponders",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Definition of treatment outcome","Definition treatment outcome",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Type of functional-connectivity-based input features","Input features",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Way of estimating the underlying functional connectivities","Estimating FCs",colnames(data_sumtable))
## Step 4: turn values of numeric variables (N and Best ACC) into strings
data_sumtable$N <- as.character(data_sumtable$N)
data_sumtable$`Best Acc` <- as.character(data_sumtable$`Best Acc`)
# More info about settings in flextable: https://ardata-fr.github.io/flextable-book/cell-content.html
# Set defaults
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "left",
line_spacing = 1.5)
# Initialize flex_table
apa_sumtable <- flextable(data_sumtable)
# Set table properties
apa_sumtable <- set_table_properties(apa_sumtable, width = 1, layout = "autofit")
# Save flextable to word
margins <- page_mar(
bottom = 0.5,
top = 0.5,
right = 0.5,
left = 0.5,
header = 0.5,
footer = 0.5,
gutter = 0.5
)
format_table <- prop_section(
page_size = page_size(orient = "landscape"),
page_margins = margins)
flextable::save_as_docx(apa_sumtable, path = "plots/table_1_extraction.docx", pr_section = format_table)
data_sumtable
data_acc_meta <- merge(data_acc, data_sumtable[,c("Study","Primary disorder")])
data_acc_meta$`Primary disorder` <- gsub(" & BPD","",data_acc_meta$`Primary disorder`)
data_acc_meta$`Primary disorder` <- gsub("\\(partial\\) ","",data_acc_meta$`Primary disorder`)
data_acc_meta$`Primary disorder`
## [1] "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD"
## [11] "MDD" "PTSD" "PTSD"
colnames(data_acc_meta) <- gsub(" ","_",colnames(data_acc_meta))
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc_meta$ROB <- ifelse(data_acc_meta$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")
library(meta)
library(metafor)
# Add column with correctly classified classes per study
data_acc_meta$n_correct_cases <- round(data_acc_meta$Balanced_acc_final * 0.01 * data_acc_meta$Sample_size,0)
data_acc_meta$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")
# Pool proportions
m.prop <- metaprop(event = n_correct_cases,
n = Sample_size,
studlab = Study, # column for study name
data = data_acc_meta,
method = "Inverse",
method.ci = "WS",
sm = "PFT", # Freeman-Turkey Double Arcsine transformation ODER Arcsine transformation
fixed = FALSE,
random = TRUE,
hakn = TRUE, # Hartung-Knapp adjustment
title = "Predicting treatment response")
summary(m.prop)
## Review: Predicting treatment response
##
## proportion 95%-CI %W(random)
## Drysdale, 2017 0.7823 [0.7017; 0.8458] 9.3
## Harris, 2022 0.5833 [0.5017; 0.6607] 9.5
## Hopman, 2021 0.8525 [0.7428; 0.9204] 7.8
## Kong, 2021 0.8902 [0.8044; 0.9412] 8.5
## Moreno-Ortega, 2019 0.8889 [0.6720; 0.9690] 4.5
## Pei, 2020 0.8061 [0.7169; 0.8722] 8.8
## Schultz, 2018 0.7143 [0.5004; 0.8619] 4.9
## Sun, 2020 0.6721 [0.5847; 0.7491] 9.2
## Tian, 2020 0.6792 [0.5855; 0.7605] 9.0
## van Waarde, 2015 0.8444 [0.7122; 0.9225] 7.0
## Wu, 2022 0.8060 [0.6958; 0.8829] 8.0
## Zhutovsky, 2019 0.8182 [0.6804; 0.9049] 6.9
## Zhutovsky, 2021 0.7500 [0.5981; 0.8581] 6.7
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
## - Wilson Score confidence interval for individual studies
# Forest plot
svg(file='plots/forestplot.svg', width = 9, height = 5)
forest_plot <- forest(m.prop,
sortvar = TE,
prediction = TRUE,
print.tau2 = FALSE,
rightlabs = c("Balanced accuracy", "95%-CI", "Weight")
#leftlabs = c("Study",)
)
dev.off()
## png
## 2
Please note that we did not report the publication bias in our meta-analysis as this is not recommended in the case of high heterogeneity.
funnel(m.prop)
metabias(m.prop)
## Review: Predicting treatment response
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.72, df = 11, p-value = 0.1134
## Bias estimate: 3.1678 (SE = 1.8416)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 3.3991)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
metabias(m.prop, method.bias = "peters")
## Review: Predicting treatment response
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.41, df = 11, p-value = 0.1876
## Bias estimate: 4.7621 (SE = 3.3891)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 0.1547)
## - predictor: inverse of total sample size
## - weight: inverse variance of average event probability
## - reference: Peters et al. (2006), JAMA
update(m.prop,
subgroup = Treatment,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2
## Treatment = rTMS 2 0.8089 [0.3054; 1.0000] 0.0007
## Treatment = medication 5 0.7578 [0.5938; 0.8907] 0.0174
## Treatment = ECT 3 0.7891 [0.4715; 0.9862] 0.0130
## Treatment = medication and psychotherapy 1 0.7143 [0.4996; 0.8908] --
## Treatment = psychotherapy 2 0.7867 [0.2848; 1.0000] 0
## tau Q I^2
## Treatment = rTMS 0.0259 1.22 18.1%
## Treatment = medication 0.1317 33.68 88.1%
## Treatment = ECT 0.1140 7.45 73.2%
## Treatment = medication and psychotherapy -- 0.00 --
## Treatment = psychotherapy 0 0.56 0.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 1.42 4 0.8409
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
update(m.prop,
subgroup = Primary_disorder,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2 tau Q
## Primary_disorder = MDD 11 0.7738 [0.7039; 0.8371] 0.0104 0.1017 46.47
## Primary_disorder = PTSD 2 0.7867 [0.2848; 1.0000] 0 0 0.56
## I^2
## Primary_disorder = MDD 78.5%
## Primary_disorder = PTSD 0.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 0.06 1 0.7988
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
update(m.prop,
subgroup = ROB,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2 tau
## ROB = low risk of data leakage 9 0.7619 [0.6852; 0.8312] 0.0095 0.0976
## ROB = high risk of data leakage 4 0.8071 [0.6459; 0.9301] 0.0089 0.0944
## Q I^2
## ROB = low risk of data leakage 37.06 78.4%
## ROB = high risk of data leakage 9.49 68.4%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 0.58 1 0.4459
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
m.prop.reg <- metareg(m.prop, ~Sample_size)
m.prop.reg
##
## Mixed-Effects Model (k = 13; tau^2 estimator: REML)
##
## tau^2 (estimated amount of residual heterogeneity): 0.0047 (SE = 0.0036)
## tau (square root of estimated tau^2 value): 0.0685
## I^2 (residual heterogeneity / unaccounted variability): 57.79%
## H^2 (unaccounted variability / sampling variability): 2.37
## R^2 (amount of heterogeneity accounted for): 46.04%
##
## Test for Residual Heterogeneity:
## QE(df = 11) = 26.1344, p-val = 0.0062
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 11) = 6.3943, p-val = 0.0280
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 1.2096 0.0620 19.5100 11 <.0001 1.0731 1.3460 ***
## Sample_size -0.0017 0.0007 -2.5287 11 0.0280 -0.0031 -0.0002 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bubble(m.prop.reg, studlab = TRUE)
The type of treatment has no substantial effect on the accuracy or heterogeneity. MDD alone is not much different. Risk of data leakage has small effect. The sample size has a substantial effect on the accuracy.
The mean raw accuracy of the best model per study is 80%, with a range of 61% - 90%. The “balanced mean accuracy” is 78%, with a range of 58% - 89%.
# Add column for ROB to data_acc using information from 4.8. in data_PROBAST
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc$ROB <- ifelse(data_acc$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")
# Bring data_acc into longformat
data_acc_long <- reshape(data = data_acc, idvar = "Study", varying = c("Reported_Accuracy_rounded","Balanced_acc_final"), v.name = "metric", times = c("Reported_Accuracy_rounded","Balanced_acc_final"), new.row.names = 1:1000, direction = "long")
data_acc_long$time <- as.factor(data_acc_long$time)
data_acc_long$time <- factor(data_acc_long$time, levels = c("Reported_Accuracy_rounded", "Balanced_acc_final"))
# Plot data without risk of bias
labels_acc_germ <- c("berichtete Genauigkeit","für Zufallsniveau\nkontrollierte Genauigkeit")
labels_acc_eng <- c("reported\naccuracy","balanced\naccuracy")
plot_acc_contr_violin <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots) +
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
scale_x_discrete(labels = labels_acc_eng)
# Plot data with risk of bias
plot_acc_contr_violin_rob <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots-0.5, aes(colour = `ROB`)) +
scale_colour_manual(name = "Risk of bias", values=c("grey69","black"))+
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots -0.5,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
scale_x_discrete(labels = labels_acc_eng)+
guides(color = guide_legend(nrow = 2, byrow = TRUE))
# Display and Save both plots
plot_acc_contr_violin
plot_acc_contr_violin_rob
ggsave("plots/violin_acc_normal_and_controlled.svg", plot_acc_contr_violin)
ggsave("plots/violin_acc_normal_and_controlled_rob.svg",plot_acc_contr_violin_rob, height = 4)
The mean sample size was N = 75, with a range of [18, 144].
# Add column for type of treatment manually to data_acc
data_acc$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")
# Plot settings
size_half = size_dots * 1.5
pos_half_vert = 0.4
pos_half_horiz = 0.2
# Plot sample size and balanced accuracy
plot_acc_contr_sample_size <- ggplot(data = data_acc, aes(y =`Balanced_acc_final`, x = `Sample_size`))+
theme(text = element_text(family = "Arial"))+
geom_point(aes(color = `Treatment`), size = size_dots)+ #draw point for all treatments
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D7",
aes(`Sample_size`, `Balanced_acc_final`,colour = "medication"),
size= size_half,
hjust = pos_half_horiz,
vjust = pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D6",
aes(`Sample_size`, `Balanced_acc_final`,colour = "psychotherapy"),
size= size_half,
hjust = pos_half_horiz + 0.57,
vjust= pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
scale_color_manual(
breaks = c("medication", "ECT","psychotherapy","rTMS"),
values = c("ECT" = color_pal[1], "medication" = color_pal[2], "medication and psychotherapy" = "grey","psychotherapy" = color_pal[3], "rTMS" = color_pal[4])
)+
geom_smooth(method = "lm", color = "black", size = 0.5)+
labs(y = "Balanced accuracy in %", x = "Sample size")+
scale_x_continuous(limits=c(15,147), breaks = c(25,50,75,100,125,150)) +
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
ggsave("plots/plot_samplesize.svg",plot_acc_contr_sample_size, height = 4)
# Combine both plots (Accuracy with risk of bias and Sample size and balanced accuracy)
plot_combined <- plot_acc_contr_violin_rob + plot_acc_contr_sample_size + plot_annotation(tag_levels = "A") + plot_layout(ncol = 2)
plot_combined
cairo_pdf("plots/figure_2_acc_sample_size.pdf", height = 3.5)
plot_combined
dev.off()
## png
## 2
ggsave("plots/figure_2_acc_sample_size.svg",plot_combined, height = 3.5)
data_imp_features_clean <- data_imp_features[!(is.na(data_imp_features$Features_with_high_predictive_value)),]
12 out of 13 studies made a statement on feature importance.
# Reduce to columns needed
data_imp_features_table <- data_imp_features_clean[,c("Study","Way_of_measuring_predictive_value", "Way_of_measuring_predictive_value_categories")]
colnames(data_imp_features_table)[colnames(data_imp_features_table) == "Way_of_measuring_predictive_value_categories"] <- "Category"
# Turn hyphen into space
colnames(data_imp_features_table) <- gsub("_"," ",colnames(data_imp_features_table))
# Flextable
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
ft <- flextable(data_imp_features_table)
ft <- set_table_properties(ft, layout = "autofit")
ft <- autofit(ft)
flextable::save_as_docx(ft, path = "plots/RQ2.1_table.docx", pr_section = format_table_portrait)
# Add new rows when two categories were chosen
data_imp_features_clean_way <- data_imp_features_clean
for (row_idx in 1:nrow(data_imp_features_clean)){
entry <- data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]
if (grepl(";",entry)){
categories <- strsplit(entry,"; ")[[1]]
# Copy row
data_imp_features_clean_way <- rbind(data_imp_features_clean_way,data_imp_features_clean_way[row_idx,])
# Replace category in old row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]<-categories[1]
# Replace category in new row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[nrow(data_imp_features_clean_way)]<-categories[2]
}
}
# Plot data
plot_measure_imp <- ggplot2::ggplot(data = data_imp_features_clean_way,aes(x = `Way_of_measuring_predictive_value_categories`, color = `Study`))+
geom_bar() +
theme(legend.position = "none")+
xlab("")+
coord_flip()+
ggtitle("Ways to measure high predictive value")
library(plotly)
ggplotly(plot_measure_imp)
# Reduce_to_columns_needed
data_imp_features_level_table <- data_imp_features_clean[,c("Study","Type_of_FC-based_input_features","Features_with_high_predictive_value","Resolution_of_reporting_features_with_high_predictive_value")]
# Change colnames
colnames(data_imp_features_table)[colnames(data_imp_features_table) == "Type_of_FC-based_input_features"] <- "FC-based input features"
# Turn hyphen into space
colnames(data_imp_features_level_table) <- gsub("_"," ",colnames(data_imp_features_level_table))
# Remove unnecessary information
data_imp_features_level_table$`Type of FC-based input features` <- gsub("[(]wb:.*)","",data_imp_features_level_table$`Type of FC-based input features`)
ft <- flextable(data_imp_features_level_table)
ft <- set_table_properties(ft, layout = "autofit")
ft <- autofit(ft)
flextable::save_as_docx(ft, path = "plots/RQ2.2_resolution.docx", pr_section = format_table)
# Get column names
cols_tested <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_tested")]
cols_important <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_important")]
# First: Reshape tested-columns
data_imp_features_clean$ID = rownames(data_imp_features_clean)
data_ROIs_tested_long <- reshape(data = data_imp_features_clean,idvar = "Study", new.row.names = 1:20000,
varying = cols_tested, v.name = "tested", times = cols_tested, drop = cols_important, direction = "long")
colnames(data_ROIs_tested_long)[colnames(data_ROIs_tested_long)=="time"] <- 'ROI'
data_ROIs_tested_long$ROI <- gsub("_tested","",data_ROIs_tested_long$ROI)
data_ROIs_tested_long$ID <- paste(data_ROIs_tested_long$`Study`,data_ROIs_tested_long$ROI)
# Second: Reshape important-columns
data_ROIs_important_long <- reshape(data = data_imp_features_clean , idvar = "ID",new.row.names = 1:20000,
varying = cols_important, v.name = "important", times = cols_important, direction = "long", drop = cols_tested)
data_ROIs_important_long$ROI <- gsub("_important","",data_ROIs_important_long$time)
data_ROIs_important_long$ID <- paste(data_ROIs_important_long$`Study`,data_ROIs_important_long$ROI)
# Third: Merge both data sets
data_ROIs <- merge(data_ROIs_tested_long,data_ROIs_important_long)
data_ROIs$time <- gsub("_important","",data_ROIs$time)
colnames(data_ROIs)[colnames(data_ROIs)=="time"] <- 'Region'
# Exclude ROIs, when they were not tested
data_ROIs_only_tested <- data_ROIs[!c(data_ROIs$tested == "n"|is.na(data_ROIs$tested)|data_ROIs$tested == "NA"),]
# Convert to factor
data_ROIs_only_tested$important <- as.factor(data_ROIs_only_tested$important)
data_ROIs_only_tested$Region <-
gsub("_"," ",data_ROIs_only_tested$Region)
# Wide data set with absolute and relative freq per region
freq_all <- as.data.frame(table(data_ROIs_only_tested$Region))
data_ROIs_only_tested_important <- data_ROIs_only_tested[data_ROIs_only_tested$important=="y",]
data_ROIs_only_tested_nonimportant <- data_ROIs_only_tested[data_ROIs_only_tested$important=="n",]
freq_important <- as.data.frame(table(data_ROIs_only_tested_important$Region))
colnames(freq_important) <- c("Var1","Abs_frequency")
freq_nonimportant <- as.data.frame(table(data_ROIs_only_tested_nonimportant$Region))
colnames(freq_nonimportant) <- c("Var1","Freq_nonimportant")
freq_all <- merge(freq_all,freq_important,by= "Var1")
freq_all <- merge(freq_all,freq_nonimportant,by= "Var1")
freq_all$Rel_frequency <- round((freq_all$Abs_frequency/freq_all$Freq)*100)
# Get features with relative frequencies above 30% and order them according to frequency
temp_dataset <- freq_all[freq_all$Rel_frequency>30, c("Var1","Rel_frequency")]
temp_dataset <- arrange(temp_dataset,Rel_frequency)
Regions_high_freq <- temp_dataset$Var1
data_ROIs_only_tested$Region <- factor(data_ROIs_only_tested$Region, levels = Regions_high_freq)
This plot shows whose connectivities were particularily important for the prediction of TR.
# Reduce data set to only high frequency regions
data_ROIs_only_tested_high_freq <- data_ROIs_only_tested[data_ROIs_only_tested$Region %in% Regions_high_freq,]
plot_feat_pred_value <- ggplot2::ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar()+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_brewer(palette = "Oranges")+
geom_text(aes(label = scales::percent(..count../tapply(..count.., ..x.. ,sum)[..x..])),stat = "count", position= position_stack(vjust = 0.5))+
xlab("")+
coord_flip()
#https://stackoverflow.com/questions/55680449/ggplot-filled-barplot-with-percentage-labels
# Change labels
scaleFUN <- function(x) x*100
legend_predvalue_eng <- c("no predictive value", "predictive value")
# Plot
plot_feat_pred_value_2 <- ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar(aes (y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_brewer(palette = "Oranges", labels = legend_predvalue_eng)+
scale_y_continuous(labels=scaleFUN)+
geom_text(stat = "count", aes(label = paste0("n=",..count..)), position = position_fill(vjust =0.5))+
xlab("")+
ylab("Relative Frequency in %")+
theme(legend.title=element_blank(),legend.position = "right") +
#theme (plot.margin=unit (c (8,5.5,5.5,5.5),units = "pt"))+
#scale_fill_manual()+
coord_flip()
plot_feat_pred_value_2
# Save plot
cairo_pdf("plots/figure_3_predictive_value.pdf", height = 3.5)
plot_feat_pred_value_2
dev.off()
## png
## 2
ggsave("plots/figure_3_predictive_value.svg",plot_feat_pred_value_2,height = 4)
# Reduce to important columns
data_dimreduction_table <- data_dimreduction[, !(names(data_dimreduction) %in% c("Year", "Comments")), drop = FALSE]
colnames(data_dimreduction_table) <- gsub("_"," ",colnames(data_dimreduction_table))
# Flextable
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
ft <- flextable(data_dimreduction_table)
ft <- rotate(ft, j = 3:9, rotation = "btlr", part = "header", align = "center")
ft <- height(ft, height = 1.5, part = "header")
ft <- hrule(ft, rule = "exact", part = "header")
ft <- set_table_properties(ft, layout = "autofit")
flextable::save_as_docx(ft, path = "plots/RQ3_table.docx", pr_section = format_table_portrait)
# Bring data into longformat
cols_reduction <- colnames(data_dimreduction)[4:9]
data_dimreduction_long <- reshape(data = data_dimreduction, idvar = "Study",varying = cols_reduction, v.name = "applied", times = cols_reduction, new.row.names = 1:1000, direction = "long")
# Plot for feature generation
plot_reduction_generation <- ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% c("atlas-based_parcellation", "data-driven_parcellation", "theory-based_ROI-/connectivity-selection")),],aes(x = `time`, color = `Study`))+
geom_bar() +
coord_flip() +
xlab("")+
theme(legend.position = "none", plot.title = element_text(size =14))+
ggtitle("Feature generation")
ggplotly(plot_reduction_generation)
# Plot for feature extraction
cols_feat_extract <- colnames(data_dimreduction)[7:9]
plot_reduction_extraction <- ggplot2::ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% cols_feat_extract),],aes(x = `time`, color = `Study`))+
geom_bar() +
theme(legend.position = "none", plot.title = element_text(size=14))+
coord_flip() +
xlab("")+
ggtitle("Feature selection")
ggplotly(plot_reduction_extraction)
# Prepare dataset
# Remove columns that contain "comments"
cols_no_comments <- colnames(data_PROBAST)[!grepl("comments",colnames(data_PROBAST))]
data_PROBAST_table <- data_PROBAST[,cols_no_comments]
# Transpose table
PROBAST_transposed <- t(data_PROBAST_table)
PROBAST_transposed <- as.data.frame(PROBAST_transposed)
colnames(PROBAST_transposed) <- PROBAST_transposed[1, ]
PROBAST_transposed <- PROBAST_transposed[-1, ]
# Use rownames as first column (as flextable ignores rownames)
PROBAST_transposed <- PROBAST_transposed %>%
tibble::rownames_to_column(var = "Question")
# Flextable settings
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
# Create flextable
ft <- flextable(PROBAST_transposed)
ft <- set_table_properties(ft, width = 1, layout = "autofit")
ft <- autofit(ft)
flextable::save_as_docx(ft, path = "plots/PROBAST_table.docx", pr_section = format_table)
# Final rating columns
cols_final_rating <- c("Final rating domain 1 (risk of bias)", "Final rating domain 2 (risk of bias)", "Final rating domain 3 (risk of bias)", "Final rating domain 4 (risk of bias)", "Final rating total (risk of bias)")
data_PROBAST_plot <- data_PROBAST[c("Study",cols_final_rating)]
# Bring data into long-format
data_PROBAST_plot_long <- reshape(data = data_PROBAST_plot,idvar = "Study", new.row.names = 1:20000,varying = cols_final_rating, v.name = "ROB", times = cols_final_rating, direction = "long")
colnames(data_PROBAST_plot_long)[colnames(data_PROBAST_plot_long)=="time"] <- 'Rating_domain'
# Order and rename factor rating_domain
PROBAST_domains_eng <- c("ROB Sample","ROB Predictors", "ROB Outcome", "ROB Analysis", "ROB Total")
data_PROBAST_plot_long$Rating_domain <- factor(data_PROBAST_plot_long$Rating_domain,
levels = cols_final_rating, labels = PROBAST_domains_eng)
scaleFUN <- function(x) x*100
# Prepare final rating domains labels to make one label bold
breaks <- levels(data_PROBAST_plot_long$Rating_domain)
labels <- as.expression(breaks)
labels[[5]] <- bquote(bold(.(labels[[5]])))
# Plot data
legend_PROBAST_eng <- c("high", "low")
plot_PROBAST <- ggplot(data = data_PROBAST_plot_long,aes(x = `Rating_domain`, fill = `ROB`))+ geom_bar(aes (alpha = Rating_domain == "ROB Gesamt", y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
coord_flip()+
scale_fill_manual(values = c("red3","chartreuse3"), name = "Risk of bias (ROB)", labels = legend_PROBAST_eng <- c("high", "low")) +
scale_y_continuous(labels=scaleFUN)+
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0.6), guide = F)+
scale_x_discrete(label = labels, breaks = breaks)+
theme(legend.position = "top")+
xlab("")+
ylab("Relative Accuracy in %")
# Save plot
plot_PROBAST
ggsave("plots/figure_S1_PROBAST.svg",plot_PROBAST, height = 4.5, width = 9)